from pyspark.sql import SparkSession

# 创建 SparkSession
spark = SparkSession.builder.appName("WordCount").master("spark://hadoop102:7077").getOrCreate()

# 读取文本文件
text_file = spark.read.text("/user/example.txt")

# 使用 RDD 转换处理单词计数
counts = text_file.rdd \
    .flatMap(lambda line: line[0].split()) \
    .map(lambda word: (word, 1)) \
    .reduceByKey(lambda a, b: a + b)

# 收集结果
result = counts.collect()

# 打印结果
for word, count in result:
    print(f"{word}: {count}")
